"""
auth:xgt-python
datetime:2021/11/12
爬取抖音小姐姐无水印
"""

# 导入响应的模块
import re

import requests


url = 'https://www.douyin.com/video/7003557916838006057'
headers = {
    'cookie': 'douyin.com; __ac_nonce=0618e5fa500949f87754e; __ac_signature=_02B4Z6wo00f01FVZ5SAAAIDB3hJ-wJrN-bBVeeGAAHTf11; ttcid=eb9dc278c0cc45bea83e225b49f4446021; ttwid=1%7CAlp8E8Y7GuGpgvuiR0LXh4HovlP0-0PU-De89YfkYGg%7C1636720550%7C6b88145dff7c0dd31956fff838b5f1e3dd2e91c3fcf3558464df95553cb23c7c; _tea_utm_cache_6383=undefined; douyin.com; MONITOR_WEB_ID=c628df0c-7e51-4132-a3ed-7f1992b29fb5; s_v_web_id=verify_kvwd5pvy_XWJRjS6v_ajSo_4BLP_B5oi_WKqszcfH0WXo; passport_csrf_token_default=c22658c5698805c2a72c6225ec226cfa; passport_csrf_token=c22658c5698805c2a72c6225ec226cfa; _tea_utm_cache_1300=undefined; odin_tt=107b7db510cdcfe6c3dfe912959e7b47e89e79244fd6595bb7e4386513f851098ec17b8cc0c653d8426b997a52fbeac61ffbc1ca9a8907c89b6c2e4c7d923cb4; msToken=bPEz0EBZZm5bOmhLcwAfj_bIYvtgg8tkHmwTK5y_jFkN62JGB13jXw3uRTip4Le_8UXRpCsgq5QfM-xjSyiGSYqQNufeRrVF2WJPH5irj7Z2J-kkSUp-xIA=; tt_scid=JFkYRDTdWE2tR5ky7xXoI0Rbs7.J37JjmqO7NSLmqYZ2EdzznWLFDsZ41pdTFaxnedc8; msToken=-YO5HFEXED-GewaF-XJG20ZbauSMuYuXL2CHQH1nF9KeHd2VkbWZBPQxW7Muxxe4y5z3nhnYbK0NjCVMVp0EcAE92j5LYqZQhRl8lk1fWFHylM8XQnaqkkRJ',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
response = requests.get(url=url,headers=headers)
# response.json() 获取响应体的json字典数据
# 正则表达式：re 用re模块里面 findall() 方式 匹配数据
# .*? 可以匹配任意字符(除了换行符以外\n)
title = re.findall('<title data-react-helmet="true"> (.*?)</title>',response.text)[0]
href = re.findall('src(.*?)vr%3D%2',response.text)[1]
# 正则表达匹配是列表
# 列表取值 [0] 或者 [-1]
video_url = requests.utils.unquote(href).replace('":"','https:') # 解码
# requests.utils.quote(href) # 编码
# print(title)
# print(href)
print(video_url)
# 保存数据,把小姐姐视频保存到本地 你要指定编码是保存文字 是要utf-8
video_content = requests.get(url=video_url).content # 获取二进制数据内容

with open('video\\' + title + '.mp4',mode='wb') as file:
    file.write(video_content)
    print(title,video_url)